/* Appendix Table 1 */



#delimit ;
clear all;

local outfile "GWgapr10_desc";
set more off;






*******************************************************************************;
* Reading in and preparing firm-level data;
*******************************************************************************;

use if Q_rest==1 using GWgap_pr_firm_v4, clear; 

gen go = go_real/1000000;
label var go "Gross Output ($000,000)";
gen WB = exp(lnWB)/1000000;
label var WB "Wage bill ($000,000)";
sum go WB;
gen K = K_real/1000000;
label var K "Capital ($000,000)";
gen M = M_real/1000000;
label var M "Intermediate consumption ($000,000)";


* grr-ing all contributing employee counts;

gen orig_L_hc_t = L_hc_t;
label var orig_L_hc_t "Unconfidentialised L_hc_t";
unab L_hc_vars: L_hc_*;
di "`L_hc_vars'";
sum `L_hc_vars';

/* This confidentialises data;
foreach var in `L_hc_vars' {;
	grr `var', base(0);
	drop `var';
	rename grr0`var' `var';
};
*/

gen s_hc_f = L_hc_f/L_hc_t;
label var s_hc_f "Share of head count female";
gen s_fte_f = L_fte_f/L_fte_t;
label var s_fte_f "Share of FTEs female";

foreach m in hc {;
	foreach age in lt25 25to39 40to54 55p {;
		gen s_`m'_`age' = L_`m'_`age'/L_hc_t;
	};
	foreach ten in ten0 ten1 ten2 {;
		gen s_`m'_`ten' = L_`m'_`ten'/L_hc_t;
	};
};
foreach m in fte {;
	foreach age in lt25 25to39 40to54 55p {;
		gen s_`m'_`age' = L_hc_`age'*av_fte_`age'/L_hc_t;
		gen L_fte_`age' = L_hc_`age'*av_fte_`age';
	};
	foreach ten in ten0 ten1 ten2 {;
		gen s_`m'_`ten' = L_hc_`ten'*av_fte_`ten'/L_hc_t;
		gen L_fte_`ten' = L_hc_`ten'*av_fte_`ten';
	};
};

gen WPc_WP_cat_0 = 1 - (WPc_WP_cat_1 + WPc_WP_cat_2 + WPc_WP_cat_3 + WPc_WP_cat_4);


* vars to get descriptives on;
	
local dvars "go WB K M 
	L_hc_t s_hc_f 
	s_hc_lt25 s_hc_25to39 s_hc_40to54 s_hc_55p
	L_fte_t L_fte_m L_fte_f s_fte_f
	WPc_WP_cat_0 WPc_WP_cat_1 WPc_WP_cat_2 WPc_WP_cat_3 WPc_WP_cat_4
	s_hc_ten0 s_hc_ten1 s_hc_ten2
	ffe_m ffe_f";

des `dvars', full;




*******************************************************************************;
* Making unweighted and weighted firm-level descriptives; 
*******************************************************************************;


local obs: word count `dvars';
di `obs';

matrix define des = J(`obs',6,.);
local row = 1;




foreach var in `dvars' {;

	* unweighted averages;
	sum `var';
	matrix des[`row',1] = r(mean);
	matrix des[`row',2] = r(sd);
	
	* weighted averages;
	sum `var' [w=orig_L_hc];
	matrix des[`row',5] = r(mean);
	matrix des[`row',6] = r(sd);
	
	egen pentgp = group(pent) if r(sample);
	qui sum pentgp;
	local num_pents = r(max);
	matrix des[`row',3] = `num_pents';
	
	qui sum year if pentgp<.;
	local num_pent_yr = r(N);
	matrix des[`row',4] = `num_pent_yr';
	
	drop pentgp;
	local row = `row' + 1;
};
matrix list des;	




drop _all;
svmat des;
rename des1 mean;
rename des2 sd;
rename des5 wgtd_mean;
rename des6 wgtd_sd;
gen var = "";
local n = 1;
foreach var in `dvars' {;
	local varname: word `n' of `dvars';
	replace var = "`varname'" if _n==`n';
	local n = `n' + 1;
};
order var;


* rounding means <1 to 3dp along with their sds;

replace sd = round(1000*sd)/1000 if abs(mean)<1;
replace mean = round(1000*mean)/1000 if abs(mean)<1;
replace wgtd_sd = round(1000*wgtd_sd)/1000 if abs(wgtd_mean)<1;
replace wgtd_mean = round(1000*wgtd_mean)/1000 if abs(wgtd_mean)<1;


* rounding means 1-10 to 2dp along with their sds;

replace sd = round(100*sd)/100 if abs(mean)>=1 & abs(mean)<10; 
replace mean = round(100*mean)/100 if abs(mean)>=1 & abs(mean)<10;
replace wgtd_sd = round(100*wgtd_sd)/100 if abs(wgtd_mean)>=1 & abs(wgtd_mean)<10; 
replace wgtd_mean = round(100*wgtd_mean)/100 if abs(wgtd_mean)>=1 & abs(wgtd_mean)<10;


* rounding means 10-100 to 1dp along with their sds;
#delimit ;
replace sd = round(10*sd)/10 if abs(mean)>=10 & abs(mean)<100; 
replace mean = round(10*mean)/10 if abs(mean)>=10 & abs(mean)<100;
replace wgtd_sd = round(10*wgtd_sd)/10 if abs(wgtd_mean)>=10 & abs(wgtd_mean)<100; 
replace wgtd_mean = round(10*wgtd_mean)/10 if abs(wgtd_mean)>=10 & abs(wgtd_mean)<100;


* rounding means >100 to 0dp along with their sds;
#delimit ;
replace sd = round(sd) if abs(mean)>=100; 
replace mean = round(mean) if abs(mean)>=100;
replace wgtd_sd = round(wgtd_sd) if abs(wgtd_mean)>=100; 
replace wgtd_mean = round(wgtd_mean) if abs(wgtd_mean)>=100;
	

/* this section creates confidentialised versions of num_firms and num_firm_yr; 
grr des3;
drop des3;
replace rr3des3 = . if _n>1;
rename rr3des3 num_firms;
label var num_firms "Number of distinct pents, rr3";

grr des4;
drop des4;
replace rr3des4 = . if _n>1;
rename rr3des4 num_firm_yr;
label var num_firm_yr "Number of pent-years, rr3";
*/
rename des3 num_firms; * drop this line when using confidentialisation code above;
rename des4 num_firm_yr; * drop this line when using confidentialisation code above;


label var wgtd_mean "Head count-weighted mean";
label var wgtd_sd "Head count-weighted standard deviation";

save "`outfile'_firm_Q_rest", replace;


replace num_firms = num_firms[_n-1] if num_firms==.;
replace num_firm_yr = num_firm_yr[_n-1] if num_firm_yr==.;

gen order = .;
local order 1;

foreach var in go WB K M 
	L_hc_t s_hc_f 
	s_hc_lt25 s_hc_25to39 s_hc_40to54 s_hc_55p
	WPc_WP_cat_0 WPc_WP_cat_1 WPc_WP_cat_2 
	{; * L_fte_t L_fte_m L_fte_f s_fte_f WPc_WP_cat_3 WPc_WP_cat_4 s_hc_ten0 s_hc_ten1 s_hc_ten2 ffe_m ffe_f;
	
	replace order = `order' if var=="`var'";
	local order = `order' + 1;
};

drop if order==.;
sort order;

rename mean raw_mean;
rename sd raw_sd;

rename *_mean *1;
rename *_sd *2;
reshape long raw wgtd, i(var num_*) j(type);

label define type 1 "mean" 2 "sd";
label val type type;

rename raw samp_unwgtd;
rename wgtd samp_wgtd;

tostring samp_*, replace force usedisplay;

foreach samp in unwgtd wgtd {;
	replace samp_`samp' = "(" + samp_`samp' + ")" if type==2;
	replace samp_`samp' = subinstr(samp_`samp', "(.","(0.",1);
};

order var type samp_unwgtd samp_wgtd;
label var samp_unwgtd "Mean and SD across firms in restricted sample, unweighted";
label var samp_wgtd "Mean and SD across firms in restricted sample, weighted by firm head count";

sort order type;
drop order;
replace num_firms = . if _n>1;
replace num_firm_yr = . if _n>1;

export excel using "`outfile'_firm_Q_rest.xlsx", sh("clean") sheetrep first(varl); 



	
	

*******************************************************************************;
* Reading in and preparing individual-level IDI data;
*******************************************************************************;

local dvars "
	female 
	age_lt25 age_25to39 age_40to54 age_55p
	Wpm_hp_f Wpm_hp_m
	indiv_avfte_hp_f indiv_avfte_hp_m
	hrs_main_HLFS_f hrs_main_HLFS_m
	L_hc multiplant
	ffe_m ffe_f wfe_m wfe_f wfe_t wkd_hpp_1ya wkd_hpp_2ya";

use GWgap_pr_IDI_v4, clear; 

drop if indiv_avfte_hp==.;


* merging in hours worked for HLFS sample;

merge 1:1 snz_uid year using GWgap_pr_HLFS_v4, keep(master match) keepus(hrs_main_HLFS Q_rest_HLFS) nogen;

gen age_lt25 = age_cat==1;
label var age_lt25 "Aged under 25";
gen age_25to39 = age_cat==2;
label var age_25to39 "Aged 25 to 39";
gen age_40to54 = age_cat==3;
label var age_40to54 "Aged 40 to 54";
gen age_55p = age_cat==4;
label var age_55p "Aged 55 and over";

gen indiv_avfte_hp_f = indiv_avfte_hp if female==1;
gen indiv_avfte_hp_m = indiv_avfte_hp if female==0;
label var indiv_avfte_hp_f "Average FTEs at hp pent in each month worked at hp pent, females only";
label var indiv_avfte_hp_m "Average FTEs at hp pent in each month worked at hp pent, males only";

gen lnWpm_hp_f = lnWpm_hp if female==1;
gen lnWpm_hp_m = lnWpm_hp if female==0;
gen Wpm_hp = exp(lnWpm_hp);
gen Wpm_hp_f = Wpm_hp if female==1;
gen Wpm_hp_m = Wpm_hp if female==0;
label var Wpm_hp_f "Average wage earnings per month worked at hp pent, females only (real $)"; 
label var Wpm_hp_m "Average wage earnings per month worked at hp pent, males only (real $)"; 

gen lnL_hc = ln(L_hc);
label var lnL_hc "Firm head count (ln)";
label var L_hc "Firm size (head count)";

gen hrs_main_HLFS_f = hrs_main_HLFS if female==1;
gen hrs_main_HLFS_m = hrs_main_HLFS if female==0;
label var hrs_main_HLFS_f "Hours worked in main HLFS job, females only";
label var hrs_main_HLFS_m "Hours worked in main HLFS job, males only";

des `dvars', full;

local obs: word count `dvars';
di `obs';

matrix define des = J(`obs',12,.);
local row = 1;

foreach var in `dvars' {;

	* stats for all individuals;
	
	sum `var';
	matrix des[`row',1] = r(mean);
	matrix des[`row',2] = r(sd);
	
	egen snz_uid_gp = group(snz_uid) if r(sample) & `var'<.;
	qui sum snz_uid_gp;
	local num_ids = r(max);
	matrix des[`row',3] = `num_ids';
	
	qui sum year if snz_uid_gp<.;
	local num_id_yr = r(N);
	matrix des[`row',4] = `num_id_yr';
	
	drop snz_uid_gp;
	
	* stats for individuals in restricted sample;
	
	sum `var' if Q_rest==1;
	matrix des[`row',5] = r(mean);
	matrix des[`row',6] = r(sd);
	
	egen snz_uid_gp = group(snz_uid) if Q_rest==1 & `var'<.;
	qui sum snz_uid_gp;
	local num_ids = r(max);
	matrix des[`row',7] = `num_ids';
	
	qui sum year if snz_uid_gp<.;
	local num_id_yr = r(N);
	matrix des[`row',8] = `num_id_yr';
	
	drop snz_uid_gp;
	
	* stats for individuals in restricted HLFSsample;
	
	sum `var' if Q_rest_HLFS==1;
	matrix des[`row',9] = r(mean);
	matrix des[`row',10] = r(sd);
	
	egen snz_uid_gp = group(snz_uid) if Q_rest_HLFS==1 & `var'<.;
	qui sum snz_uid_gp;
	local num_ids = r(max);
	matrix des[`row',11] = `num_ids';
	
	qui sum year if snz_uid_gp<.;
	local num_id_yr = r(N);
	matrix des[`row',12] = `num_id_yr';
	
	drop snz_uid_gp;
	
	local row = `row' + 1;
};
matrix list des;	

drop _all;
svmat des;
rename des1 mean;
rename des2 sd;
rename des5 mean_prod5;
rename des6 sd_prod5;
rename des9 mean_HLFS;
rename des10 sd_HLFS;
gen var = "";
local n = 1;
foreach var in `dvars' {;
	local varname: word `n' of `dvars';
	replace var = "`varname'" if _n==`n';
	local n = `n' + 1;
};
order var;


* rounding means <1 to 3dp along with their sds;

foreach ty in "" "_prod5" "_HLFS" {;
	replace sd`ty' = round(1000*sd`ty')/1000 if abs(mean`ty')<1;
	replace mean`ty' = round(1000*mean`ty')/1000 if abs(mean`ty')<1;


	* rounding means 1-10 to 2dp along with their sds;

	replace sd`ty' = round(100*sd`ty')/100 if abs(mean`ty')>=1 & abs(mean`ty')<10; 
	replace mean`ty' = round(100*mean`ty')/100 if abs(mean`ty')>=1 & abs(mean`ty')<10;


	* rounding means 10-100 to 1dp along with their sds;
	replace sd`ty' = round(10*sd`ty')/10 if abs(mean`ty')>=10 & abs(mean`ty')<100; 
	replace mean`ty' = round(10*mean`ty')/10 if abs(mean`ty')>=10 & abs(mean`ty')<100;


	* rounding means 100+ to 0dp along with their sds;
	replace sd`ty' = round(sd`ty') if abs(mean`ty')>=100 ; 
	replace mean`ty' = round(mean`ty') if abs(mean`ty')>=100;
};
	
/* use this section to confidentialise;	
grr des3, base(0) seed(52124);
drop des3;
rename grr0des3 num_id;
label var num_id "Number of distinct individuals, grr";

grr des4, base(0) seed(52124);
drop des4;
rename grr0des4 num_id_yr;
label var num_id_yr "Number of individual-years, grr";

grr des7, base(0) seed(52124);
drop des7;
rename grr0des7 num_id_prod5;
label var num_id_prod5 "Number of distinct individuals in restricted data set, grr";

grr des8, base(0) seed(52124);
drop des8;
rename grr0des8 num_id_yr_prod5;
label var num_id_yr_prod5 "Number of individual-years in restricted data set, grr";

grr des11, base(0) seed(52124);
drop des11;
rename grr0des11 num_id_HLFS;
label var num_id_HLFS "Number of distinct individuals in restricted HLFS data set, grr";

grr des12, base(0) seed(52124);
drop des12;
rename grr0des12 num_id_yr_HLFS;
label var num_id_yr_HLFS "Number of individual-years in restricted HLFS data set, grr";
*/

* remove the next 6 lines when using the confidentialisation code above;
rename des3 num_id;
rename des4 num_id_yr;
rename des7 num_id_prod5;
rename des8 num_id_yr_prod5;
rename des11 num_id_HLFS;
rename des12 num_id_yr_HLFS;


format num_id num_id_yr num_id_prod5 num_id_yr_prod5 num_id_HLFS num_id_yr_HLFS %9.0f;

label var mean "Mean, all individuals";
label var sd "Std dev,. all individuals";
label var mean_prod5 "Mean, individuals in restricted data set";
label var sd_prod5 "Std dev., individuals in restricted data set";
label var mean_HLFS "Mean, individuals in restricted HLFS data set";
label var sd_HLFS "Std dev., individuals in restricted HLFS data set";

* reshaping so means and sds are on alternating lines;

#delimit ;
rename mean mean_all;
rename sd sd_all;

rename mean_* *1;
rename sd_* *2;
reshape long all prod5 HLFS, i(var num_*) j(type);

label define type 1 "mean" 2 "sd";
label val type type;

#delimit ;
rename all samp_full;
rename prod5 samp_rest;
rename HLFS samp_rest_HLFS;

#delimit ;
tostring samp_*, replace force usedisplay;

foreach samp in full rest rest_HLFS {;
	replace samp_`samp' = "(" + samp_`samp' + ")" if type==2;
	replace samp_`samp' = subinstr(samp_`samp', "(.","(0.",1);
};

order var type samp_full samp_rest samp_rest_HLFS;
label var samp_full "Mean and SD for full sample";
label var samp_rest "Mean and SD for restricted IDI sample";
label var samp_rest_HLFS "Mean and SD for restricted HLFS sample";

export excel using "`outfile'_indiv.xlsx", sh("Q_restb") sheetrep first(varl);

 


 
 
 
 
 
 
 
 
 
 
 
